# -*- coding: utf-8 -*-
# @Time    : 2018/3/21 20:32
# @Author  : shiweixian

import re

dict = {'A': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'K': 9, 'L': 10,
        'M': 11, 'N': 12, 'P': 13, 'Q': 14, 'R': 15, 'S': 16, 'T': 17, 'V': 18, 'W': 19, 'Y': 20, '-': 21}

with open("TP.fas", mode='r') as f:
    ph_file = open("PH.txt", mode='w')
    temperature_file = open("temperature.txt", mode='w')
    lines = f.readlines()
    # 温度
    temperatures = []
    # PH
    PHs = []
    # 氨基酸序列
    sequences = []
    line_count = 0
    for line in lines:
        line_count += 1
        strs = line.split('_')
        length = strs.__len__()
        if length > 1:
            # temperatures.append(strs[6])
            contain = False
            for i in range(length):
                if bool(re.search('PH', strs[i], re.IGNORECASE)):
                    PHs.append(strs[i + 1].replace('\n', ''))
                    contain = True
                    break
            if not contain:
                print('第'+str(line_count)+'行没有ph')
        else:
            sequences.append(strs)

    for sequence in sequences:
        sequence = str(sequence).replace("[", '').replace(']', '') \
            .replace("\n", '').replace('\'', '').strip()
        s_len = str(sequence).__len__()
        for i in range(s_len - 2):
            ph_file.write(str(dict[str(sequence)[i]]) + " ")
        PH = PHs.pop(0)
        if str(PH).__contains__('-'):
            PH = (float(str(PH).split('-')[0]) + float(str(PH).split('-')[1])) / 2
        ph_file.write(str(PH))
        ph_file.write("\n")
    ph_file.close()
    temperature_file.close()
